After processing the data from the PFAS Analytic Tools, I ended up with a list of ~35,000 potential/known PFAS sources. To account for differences in census tract size, I followed the EJI method for quantifying the proximity to a point source, which is as follows:
cut out the part of the polygon buffer that doesn’t overlap with the census tract polygon.
take the area of the polygon buffer that remains and divide it by the total area of the census tract polygon.
The following sections walk through this step by step for all census tracts within Bexar County, TX.
ppps_sa <- eji_census_tracts_transformed %>%
filter(county == "Bexar County" & state == "TX")
ppps_sa_select <- ppps_sa %>%
filter(geoid == "48029980100")
pfas_sa <- ppps_salvatore_tract %>%
filter(city == "SAN ANTONIO" & state.x == "TX")
tm_shape(ppps_sa) +
tm_polygons(alpha = 0.8) +
tm_shape(pfas_sa) +
tm_dots()
The map is centered around the census tract with a geoid: “48029980100”, but the map is interactive, so you can zoom out and look at other census tracts in Bexar County as well.
ppps_1mi_buffer_sa <- pfas_sa %>%
st_buffer(1609.34)
tm_shape(ppps_sa, bbox = st_bbox(ppps_sa_select)) +
tm_polygons(alpha = 0.8) +
tm_shape(ppps_1mi_buffer_sa) +
tm_polygons(alpha = 0.8, col = "red") +
tm_shape(pfas_sa) +
tm_dots()
ppps_buffer_overlap_sa <- ppps_1mi_buffer_sa %>%
st_union() %>%
st_make_valid()
tm_shape(ppps_sa, bbox = st_bbox(ppps_sa_select)) +
tm_polygons() +
tm_shape(ppps_buffer_overlap_sa) +
tm_polygons(alpha=0.8, col = "turquoise") +
tm_shape(ppps_1mi_buffer_sa) +
tm_dots(alpha=0.8)
buffers_sa <- st_intersection(ppps_buffer_overlap_sa, ppps_sa_select)
tm_shape(ppps_sa, bbox = ppps_sa_select) +
tm_polygons() +
tm_shape(buffers_sa) +
tm_polygons(alpha=0.8, col = "turquoise", border.col = "red") +
tm_shape(ppps_1mi_buffer_sa) +
tm_dots(alpha=0.8)
\[ \text{perc_area_pfas}_i = \dfrac{\sum\text{area}(\text{PFAS point source with 1 mile buffer that overlaps with census tract i})}{\text{area}(\text{census tract i})}*100\% \]
buffers_sa <- list()
perc_calc_sa <- list()
for(i in unique(ppps_sa$geoid)){
ppps_sa_select2 <- ppps_sa %>%
filter(geoid == i)
buffers_sa[[i]] <- st_intersection(ppps_buffer_overlap_sa, ppps_sa_select2)
perc_calc_sa[[i]] <- st_area(buffers_sa[[i]])/st_area(ppps_sa_select2)
}
buffers_diff_sa <- buffers_sa[lapply(buffers_sa, length) > 0] %>%
bind_rows() %>%
pivot_longer(names_to = "geoid", values_to = "geometry", everything()) %>%
st_as_sf()
perc_tract_pfas_sa <- perc_calc_sa[lapply(perc_calc_sa, length) > 0] %>%
bind_rows() %>%
pivot_longer(names_to = "geoid", values_to = "perc_area_pfas", everything())
ppps_sa_perc <- ppps_sa %>%
left_join(perc_tract_pfas_sa, by = "geoid") %>%
mutate(perc_area_pfas = units::drop_units(perc_area_pfas))
tm_shape(ppps_sa_perc, bbox = st_bbox(ppps_sa_select)) +
tm_polygons("perc_area_pfas", palette = "PuRd") +
tm_shape(buffers_diff_sa) +
tm_polygons(border.col = "turquoise", alpha = 0) +
tm_shape(ppps_1mi_buffer_sa) +
tm_dots(alpha=0.8)
Summary stats for the percent of a census tract that falls within a 1-mile radius of a PFAS point source.
gtsummary::tbl_summary(ppps_perc_df,
include = "perc_area_pfas_0",
type = list(everything() ~ "continuous2"),
statistic = list(
everything() ~ c("{mean} ({var})",
"{median} ({p25}, {p75})",
"{p90}",
"{p95}",
"{p99}")
)) %>%
tbl_theme(gt = TRUE)
Characteristic | N = 73,056 |
|---|---|
perc_area_pfas_0 | |
Mean (Variance) | 0.04 (0.03) |
Median (IQR) | 0.00 (0.00, 0.00) |
90% | 0.00 |
95% | 0.19 |
99% | 0.99 |
It’s kind of hard to see what is going on in census tracts that do
overlap with a PFAS point source buffer because there are so many census
tracts that don’t…
here’s the distribution of the variable
with and without those census tracts that have no PFAS point source:
perc_area_plot <- ggplot(ppps_perc_df, aes(x = perc_area_pfas)) +
geom_histogram(aes(y = after_stat(density)),
color = "black",
fill = "white",
bins = 30) +
geom_density(fill = "red", alpha = 0.25) +
labs(title = "Distribution of perc_area_pfas for\ncensus tracts with a PFAS point source",
x = "percent of census tract that falls within a\n1-mile radius of a PFAS point source (not including tracts with 0%)")
perc_area_plot0 <- ggplot(ppps_perc_df, aes(x = perc_area_pfas_0)) +
geom_histogram(aes(y = after_stat(density)),
color = "black",
fill = "white",
bins = 30) +
geom_density(fill = "red", alpha = 0.25) +
labs(title = "Distribution of perc_area_pfas for\nall census tracts in EJI",
x = "percent of census tract that falls within a\n1-mile radius of a PFAS point source")
cowplot::plot_grid(perc_area_plot0, perc_area_plot)